# Functions

# Plot color palette
plot_color_palette <- function(input_cols) {
  
  col_data <- tibble(color = input_cols) %>%
    mutate(color =  fct_inorder(color))
  
  res <- col_data %>%
    ggplot(aes(x = "color", fill = color)) +
    geom_bar() +
    scale_fill_manual(values = input_cols) +
    theme_void()
  
  res
}

# Pull items from list of vectors
pull_nest_vec <- function(list_in, idx) {
  res <- map_chr(list_in, ~ .x[[idx]])
  
  res
}

# Capitalize first character without modifying other characters
str_to_title_v2 <- function(string) {
  
  cap_first_chr <- function(string) {
    chrs <- string %>%
      str_split(pattern = "") %>%
      unlist()
    
    if (any(chrs %in% LETTERS)) {
      return(string)
    }
    
    chrs[1] <- str_to_upper(chrs[1])
    
    res <- chrs %>%
      reduce(str_c)
    
    res
  }
  
  res <- string %>%
    map_chr(~ {
      .x %>%
        str_split(pattern = " ") %>%
        unlist() %>%
        map_chr(cap_first_chr) %>%
        reduce(str_c, sep = " ")
    })
  
  res
}

# Set colors
set_cols <- function(types_in, cols_in, other_cols) {
  
  types_in <- types_in[!types_in %in% names(other_cols)]
  cols_in <- cols_in[!cols_in %in% other_cols]
  
  names(cols_in) <- types_in
  cols_in <- cols_in[!is.na(names(cols_in))]
  
  res <- c(cols_in, other_cols)
  
  res
}

# Set subtype colors
set_type_cols <- function(type_in, sobjs_in, type_key, type_column = "subtype",
                          cols_in, other_cols) {
  
  sobjs_in <- sobjs_in[type_key[names(sobjs_in)] == type_in]
  
  res <- sobjs_in %>%
    map(~ {
      .x@meta.data %>%
        pull(type_column) %>%
        unique()
    }) %>%
    reduce(c) %>%
    unique() %>%
    set_cols(
      cols_in = cols_in,
      other_cols = other_cols
    )
  
  res
}

# Import matrices and create Seurat object
create_sobj <- function(matrix_dir, proj_name, gene_min = 100, gene_max = 5000, 
                        mito_max = 25, mito_str = "^mt-", min_cells = 0, ...) {
  
  # Load matrices
  mat_list <- Read10X(matrix_dir)
  rna_mat <- mat_list
  
  # Create Seurat object using gene expression data
  if (is_list(mat_list)) {
    rna_mat <- mat_list[["Gene Expression"]]
  }
  
  res <- rna_mat %>%
    CreateSeuratObject(
      project = proj_name,
      min.cells = min_cells
    )
  
  # Add antibody capture data to Seurat object
  if (is_list(mat_list)) {
    adt_mat <- res %>%
      colnames() %>%
      mat_list[["Antibody Capture"]][, .]
    
    res[["ADT"]] <- CreateAssayObject(adt_mat)
  }
  
  # Calculate percentage of mitochondrial reads
  res <- res %>%
    PercentageFeatureSet(
      pattern = mito_str, 
      col.name = "Percent_mito"
    )
  
  # Add QC classifications to meta.data
  res@meta.data <- res@meta.data %>%
    rownames_to_column("cell_ids") %>%
    mutate(qc_class = "Pass filters") %>%
    column_to_rownames("cell_ids")
  
  res@meta.data <- res@meta.data %>%
    rownames_to_column("cell_id") %>%
    mutate(
      qc_class = str_replace(qc_class, "Singlet", "Pass filters"),
      qc_class = ifelse(nFeature_RNA <= gene_min, "Low gene count", qc_class),
      qc_class = ifelse(nFeature_RNA >= gene_max, "High gene count", qc_class),
      qc_class = ifelse(Percent_mito >= mito_max, "High mito reads", qc_class)
    ) %>%
    column_to_rownames("cell_id")
  
  res
}

# Filter and normalize matrices
norm_sobj <- function(sobj_in, sample_order = NULL) {
  res <- sobj_in %>%
    subset(qc_class == "Pass filters") %>%
    NormalizeData(normalization.method = "LogNormalize")
  
  if ("ADT" %in% names(res)) {
    res <- res %>%
      NormalizeData(
        assay = "ADT",
        normalization.method = "CLR"
      )
  }
  
  # Add meta.data columns
  res@meta.data <- res@meta.data %>%
    rownames_to_column("cell_ids") %>%
    mutate(orig.ident = fct_relevel(orig.ident, sample_order)) %>%
    column_to_rownames("cell_ids")
  
  res
}

# Cluster gene expression data
cluster_RNA <- function(sobj_in, assay = "RNA", resolution = 1, 
                        dims = 1:30, prefix = "", ...) {
  # Use FindNeighbors to construct a K-nearest neighbors graph based on the euclidean distance in 
  # PCA space, and refine the edge weights between any two cells based on the
  # shared overlap in their local neighborhoods (Jaccard similarity).
  # Use FindClusters to apply modularity optimization techniques such as the Louvain algorithm 
  # (default) or SLM, to iteratively group cells together
  
  # Perform PCA
  # By default only variable features are used for PCA
  res <- sobj_in %>%
    RunPCA(assay = assay, ...) %>%
    AddMetaData(
      metadata = FetchData(., c("PC_1", "PC_2")),
      col.name = str_c(prefix, c("PC_1", "PC_2"))
    )
    
    # Create nearest neighbors graph and find clusters
  res <- res %>%
    FindNeighbors(
      assay     = assay,
      reduction = "pca",
      dims      = dims
    ) %>%
    FindClusters(
      resolution = resolution,
      verbose    = F
    ) %>%
    AddMetaData(
      metadata = Idents(.),
      col.name = str_c(assay, "_clusters")
    )
    
    # Run UMAP, UMAP coordinates will get added to the meta.data by clustifyr
  res <- res %>%
    RunUMAP(
      assay = assay,
      dims  = dims,
      reduction.name = str_c(prefix, "umap"),
      reduction.key  = str_c(prefix, "UMAP_")
    )
  
  res
}

# Subset Seurat objects for plotting
subset_sobj <- function(sobj_in, name, type_vec, include_list, type_column = "cell_type1", subtype_column = "cell_type2",
                        min_cells = 15, orig_idents = c("_1" = "CD45_neg", "_2" = "CD45_pos"),
                        type_filt = c("DC" = "CD45_pos", "LEC" = "CD45_neg", "fibroblast" = "CD45_neg"),
                        cap_names = T, ...) {
  
  res <- sobj_in
  
  type_in <- type_vec[[name]]
  include_types <- include_list[[name]]
  
  res@meta.data <- res@meta.data %>%
    rownames_to_column("cell_id") %>%
    mutate(
      cell_type = !!sym(type_column),
      subtype = !!sym(subtype_column)
    ) %>%
    column_to_rownames("cell_id")

  # Set orig.ident based on cell_id suffix
  if (!is.null(orig_idents)) {
    res@meta.data <- res@meta.data %>%
      rownames_to_column("cell_id") %>%
      mutate(
        orig.ident = str_extract(cell_id, "_[0-9]+$"),
        orig.ident = orig_idents[orig.ident]
      ) %>%
      column_to_rownames("cell_id")
  }
  
  # Filter cells based on orig.ident
  if (!is.null(type_filt)) {
    res <- res %>%
      subset(subset = orig.ident == type_filt[type_in])
  }
  
  # Filter cells based on input cell type
  res <- res %>%
    subset(subset = cell_type %in% c(type_in, include_types))
  
  if (!type_in %in% pull(res@meta.data, cell_type)) {
    stop(str_c("ERROR: Cell type not present after filtering for ", CD45_status))
  }
  
  # Filter cells based on number of cells per subtype
  if (!is.null(min_cells)) {
    keep_cells <- res@meta.data %>%
      rownames_to_column("cell_id") %>%
      group_by(subtype) %>%
      filter(n_distinct(cell_id) > min_cells) %>%
      pull(cell_id)
    
    res <- res %>%
      subset(cells = keep_cells)
  }
  
  # Re-run UMAP
  res <- res %>%
    RunPCA() %>%
    RunUMAP(dims = 1:40) %>%
    AddMetaData(FetchData(., c("PC_1", "PC_2"))) %>%
    AddMetaData(FetchData(., c("UMAP_1", "UMAP_2")))

  # Capitalize subtypes
  res@meta.data <- res@meta.data %>%
    rownames_to_column("cell_id") %>%
    mutate(subtype = str_to_title_v2(subtype)) %>%
    column_to_rownames("cell_id")
  
  res
}

# Re-cluster and run clustify
run_clustifyr <- function(sobj_in, name, type_vec, resolution = 1.4, type_column = "cell_type",
                          subtype_column = "subtype", clust_column = "seurat_clusters") {
  
  res <- sobj_in
  
  type_in <- type_vec[[name]]
  
  ref <- params$sobj_refs[[type_in]] %>%
    here()
  
  if (is_empty(ref)) {
    return(res)
  }
  
  load(ref)
  
  ref <- ref %>%
    basename() %>% 
    str_remove("\\.rda$")
  
  res <- sobj_in %>%
    FindNeighbors(
      reduction = "pca",
      dims = 1:30
    ) %>%
    FindClusters(
      resolution = resolution,
      verbose = F
    ) %>%
    clustify(
      ref_mat = eval(parse(text = ref)),
      cluster_col = clust_column
    )
  
  res@meta.data <- res@meta.data %>%
    rownames_to_column("cell_id") %>%
    mutate(
      !!sym(subtype_column) := if_else(!!sym(type_column) == type_in, type, !!sym(type_column)),
      !!sym(subtype_column) := str_to_title_v2(!!sym(subtype_column))
    ) %>%
    rename(
      UMAP_1 = UMAP_1.x,
      UMAP_2 = UMAP_2.x
    ) %>%
    column_to_rownames("cell_id")
  
  res
}

# Filter list of Seurat objects for patient, normalize and merge objects 
merge_sobj <- function(sobj_list, sample_order = NULL) {

  res <- merge(
    x = sobj_list[[1]],
    y = sobj_list[2:length(sobj_list)],
    add.cell.ids = names(sobj_list)
  ) %>%
    ScaleData(assay = "RNA") %>%
    ScaleData(assay = "adt") %>%
    FindVariableFeatures(assay = "RNA")
  
  # Set sample order
  res@meta.data <- res@meta.data %>%
    rownames_to_column("cell_ids") %>%
    mutate(orig.ident = fct_relevel(orig.ident, sample_order)) %>%
    column_to_rownames("cell_ids")
  
  res
}

# Fit gaussian mixture model for given signal
fit_GMM <- function(sobj_in, data_column = "adt_ovalbumin") {
  
  set.seed(42)
  
  # Fit GMM for OVA signal
  data_df <- sobj_in %>%
    FetchData(data_column)
  
  mixmdl <- data_df %>%
    pull(data_column) %>%
    normalmixEM()
  
  # New column names
  ova_names <- c("Low", "High")
  comp_names <- c("comp.1", "comp.2")
  
  if (mixmdl$mu[1] > mixmdl$mu[2]) {
    ova_names <- rev(ova_names)
  }
  
  names(comp_names)    <- ova_names
  names(mixmdl$mu)     <- ova_names
  names(mixmdl$sigma)  <- ova_names
  names(mixmdl$lambda) <- ova_names

  # Divide into OVA groups
  res <- data.frame(
    cell_id = rownames(data_df),
    data = data_df[, data_column],
    mixmdl$posterior
  ) %>%
    dplyr::rename(!!sym(data_column) := data) %>%
    rename(all_of(comp_names)) %>%
    mutate(GMM_grp = if_else(Low > 0.5, "Low", "High")) %>%
    column_to_rownames("cell_id")
  
  res <- list(
    res    = res,
    mu     = mixmdl$mu,
    sigma  = mixmdl$sigma,
    lambda = mixmdl$lambda
  )
  
  res
}

# Add distribution of GMM component to plot
add_stat_fun <- function(gmm_in, cols_in, key) {
  # dnorm provides density for the normal distribution given the mean and
  # standard deviation. Lambda is used to adjust for the mixture composition.
  # mu: Mean of component
  # sig: Standard deviation of component
  # lam: Lambda of component (mixture weight)
  plot_mix_comps <- function(x, mu, sigma, lam) {
    lam * dnorm(x, mu, sigma)
  }
  
  stat_function(
    geom  = "line",
    fun   = plot_mix_comps,
    args  = list(gmm_in$mu[key], gmm_in$sigma[key], gmm_in$lambda[key]),
    color = cols_in[key],
    lwd   = 1
  )
}

# Overlay feature data on UMAP or tSNE
# Cannot change number of columns when using FeaturePlot with split.by
plot_features <- function(sobj_in, x = "UMAP_1", y = "UMAP_2", feature, data_slot = "data", 
                          split_id = NULL, pt_size = 0.25, pt_outline = NULL, plot_cols = NULL,
                          feat_levels = NULL, split_levels = NULL, min_pct = NULL, max_pct = NULL, 
                          calc_cor = F, lm_line = F, lab_size = 3.7, lab_pos = c(0.8, 0.9), ...) {
  
  # Format imput data
  counts <- sobj_in
  
  if ("Seurat" %in% class(sobj_in)) {
    vars <- c(x, y, feature)
    
    if (!is.null(split_id)) {
      vars <- c(vars, split_id)
    }

    counts <- sobj_in %>%
      FetchData(vars = unique(vars), slot = data_slot) %>%
      as_tibble(rownames = "cell_ids")
  }
  
  # Rename features
  if (!is.null(names(feature))) {
    counts <- counts %>%
      rename(!!!syms(feature))
    
    feature <- names(feature)
  }
  
  if (!is.null(names(x))) {
    counts <- counts %>%
      rename(!!!syms(x))
    
    x <- names(x)
  }
  
  if (!is.null(names(y))) {
    counts <- counts %>%
      rename(!!!syms(y))
    
    y <- names(y)
  }
  
  # Set min and max values for feature
  if (!is.null(min_pct) || !is.null(max_pct)) {
    counts <- counts %>%
      mutate(
        pct_rank = percent_rank(!!sym(feature)),
        max_val  = ifelse(pct_rank > max_pct, !!sym(feature), NA),
        max_val  = min(max_val, na.rm = T),
        min_val  = ifelse(pct_rank < min_pct, !!sym(feature), NA),
        min_val  = max(min_val, na.rm = T),
        !!sym(feature) := if_else(!!sym(feature) > max_val, max_val, !!sym(feature)),
        !!sym(feature) := if_else(!!sym(feature) < min_val, min_val, !!sym(feature))
      )
  }

  # Set feature order
  if (!is.null(feat_levels)) {
    counts <- counts %>%
      mutate(!!sym(feature) := fct_relevel(!!sym(feature), feat_levels))
  }
  
  # Set facet order
  if (!is.null(split_id) && length(split_id) == 1) {
    counts <- counts %>%
      mutate(split_id = !!sym(split_id))
    
    if (!is.null(split_levels)) {
      counts <- counts %>%
        mutate(split_id = fct_relevel(split_id, split_levels))
    }
  }
  
  # Calculate correlation
  if (calc_cor) {
    if (!is.null(split_id)) {
      counts <- counts %>%
        group_by(split_id)
    }
    
    counts <- counts %>%
      mutate(
        cor_lab = cor(!!sym(x), !!sym(y)),
        cor_lab = round(cor_lab, digits = 2),
        cor_lab = str_c("r = ", cor_lab),
        min_x   = min(!!sym(x)),
        max_x   = max(!!sym(x)),
        min_y   = min(!!sym(y)),
        max_y   = max(!!sym(y)),
        lab_x   = (max_x - min_x) * lab_pos[1] + min_x,
        lab_y   = (max_y - min_y) * lab_pos[1] + min_y
      )
  }
  
  # Create scatter plot
  # To add outline for each cluster create separate layers
  res <- counts %>%
    arrange(!!sym(feature))
  
  if (!is.null(pt_outline)) {
    
    if (!is.numeric(counts[[feature]])) {
      res <- res %>%
        ggplot(aes(!!sym(x), !!sym(y), color = !!sym(feature), fill = !!sym(feature)))
      
      feats <- counts[[feature]] %>%
        unique()
      
      if (!is.null(feat_levels)) {
        feats <- feat_levels[feat_levels %in% feats]
      }
      
      for (feat in feats) {
        f_counts <- counts %>%
          filter(!!sym(feature) == feat)
        
        res <- res +
          geom_point(data = f_counts, aes(fill = !!sym(feature)), size = pt_outline, color = "black", show.legend = F) +
          geom_point(data = f_counts, size = pt_size)
      }
      
    } else {
      res <- res %>%
        ggplot(aes(!!sym(x), !!sym(y), color = !!sym(feature))) +
        geom_point(aes(fill = !!sym(feature)), size = pt_outline, color = "black", show.legend = F) +
        geom_point(size = pt_size)
    }
    
  } else {
    res <- res %>%
      ggplot(aes(!!sym(x), !!sym(y), color = !!sym(feature))) +
      geom_point(size = pt_size)
  }

  # Add regression line
  if (lm_line) {
    res <- res +
      geom_smooth(method = "lm", se = F, color = "black", size = 0.5, linetype = 2)
  }
  
  # Add correlation coefficient label
  if (calc_cor) {
    res <- res +
      geom_text(
        aes(x = lab_x, lab_y, label = cor_lab),
        color = "black",
        size  = lab_size,
        check_overlap = T, 
        show.legend = F
      )
  }
  
  # Set feature colors
  if (!is.null(plot_cols)) {
    if (is.numeric(counts[[feature]])) {
      res <- res +
        scale_color_gradient(low = plot_cols[1], high = plot_cols[2])

    } else {
      res <- res +
        scale_color_manual(values = plot_cols) +
        scale_fill_manual(values = plot_cols)
    }
  }
  
  # Split plot into facets
  if (!is.null(split_id)) {
    if (length(split_id) == 1) {
      res <- res +
        facet_wrap(~ split_id, ...)
      
    } else if (length(split_id) == 2) {
      eq <- str_c(split_id[1], " ~ ", split_id[2])
      
      res <- res +
        facet_grid(as.formula(eq), ...)
    }
  }
  
  res
}

# Run gprofiler
run_gprofiler <- function(gene_list, genome = NULL, gmt_id = NULL, p_max = params$p_max_GO,
                          order_query = params$order_query, dbases = c("GO:BP", "GO:MF", "KEGG"), ...) {
  
  # Check for empty gene list
  if (is_empty(gene_list)) {
    return(as_tibble(NULL))
  }
  
  # Check arguments
  if (is.null(genome) && is.null(gmt_id)) {
    stop("ERROR: Must specifiy genome or gmt_id")
  }
  
  # Retrieve organism name for gProfileR
  if (!is.null(genome)){
    genomes <- list(
      GRCm = "mmusculus",
      GRCh = "hsapiens",
      BDGP = "dmelanogaster"
    )
    
    org <- genome %>% 
      str_remove("[0-9]+$") %>%
      genomes[[.]]
  }
  
  if (!is.null(gmt_id)) {
    org <- gmt_id
    dbases <- NULL
  }
  
  # Run gProfileR
  res <- gene_list %>%
    gost(
      organism       = org,
      sources        = dbases,
      domain_scope   = "annotated",
      significant    = T,
      user_threshold = p_max,
      ordered_query  = order_query,
      ...
    )
  
  # Format and sort output data.frame
  res <- as_tibble(res$result)
  
  if (!is_empty(res)) {
    res <- res %>%
      arrange(source, p_value)
  }
  
  res
}

# Create GO bubble plot
create_bubbles <- function(GO_df, plot_colors = theme_cols[c(1:2, 4, 9)],
                           n_terms = 15) {

  # Check for empty inputs
  if (is_empty(GO_df) || nrow(GO_df) == 0) {
    res <- ggplot() +
      geom_blank()
    
    return(res)
  }
  
  # Shorten GO terms and database names
  GO_data <- GO_df %>%
    mutate(
      term_id = str_remove(term_id, "(GO|KEGG):"),
      term_id = str_c(term_id, " ", term_name),
      term_id = str_to_lower(term_id),
      term_id = str_trunc(term_id, 40, "right"),
      source  = fct_recode(
        source,
        "Biological\nProcess" = "GO:BP",
        "Cellular\nComponent" = "GO:CC",
        "Molecular\nFunction" = "GO:MF",
        "KEGG"                = "KEGG"
      )
    )
  
  # Reorder database names
  plot_levels <- c(
    "Biological\nProcess",
    "Cellular\nComponent",
    "Molecular\nFunction",
    "KEGG"
  )
  
  GO_data <- GO_data %>%
    mutate(source = fct_relevel(source, plot_levels))
  
  # Extract top terms for each database
  top_GO <- GO_data %>%
    group_by(source) %>%
    arrange(p_value) %>%
    dplyr::slice(1:n_terms) %>%
    ungroup()
  
  # Create bubble plots
  res <- GO_data %>%
    ggplot(aes(1.25, -log10(p_value), size = intersection_size)) +
    geom_point(color = plot_colors, alpha = 0.5, show.legend = T) +
    geom_text_repel(
      aes(2, -log10(p_value), label = term_id),
      data         = top_GO,
      size         = 2.3,
      direction    = "y",
      hjust        = 0,
      segment.size = NA
    ) +
    xlim(1, 8) +
    labs(y = "-log10(p-value)") +
    theme_info +
    theme(
      axis.title.x    = element_blank(),
      axis.text.x     = element_blank(),
      axis.ticks.x    = element_blank()
    ) +
    facet_wrap(~ source, scales = "free", nrow = 1)
  
  res
}

# Plot percentage of cells in given groups
plot_cell_count <- function(sobj_in, group_id, split_id = NULL, group_order = NULL,
                            fill_id, plot_colors = theme_cols,
                            x_lab = "Cell type", y_lab = "Fraction of cells",
                            bar_pos = "fill", order_count = T, bar_line = 0, ...) {
  
  res <- sobj_in@meta.data %>%
    rownames_to_column("cell_ids") %>%
    mutate(
      group_id := !!sym(group_id),
      fill_id  := !!sym(fill_id)
    )
  
  if (!is.null(group_order)) {
    res <- res %>%
      mutate(group_id = fct_relevel(group_id, group_order))
  }
  
  if (!is.null(split_id)) {
    res <- res %>%
      mutate(split_id := !!sym(split_id))
  }
  
  if (order_count) {
    res <- res %>%
      mutate(fill_id = fct_reorder(fill_id, cell_ids, n_distinct))
  }

  res <- res %>%
    ggplot(aes(group_id, fill = fill_id)) +
    geom_bar(position = bar_pos, size = bar_line, color = "black") +
    scale_fill_manual(values = plot_colors) +
    labs(x = x_lab, y = y_lab) +
    theme_info

  if (!is.null(split_id)) {
    res <- res +
      facet_wrap(~ split_id, ...)
  }
  
  res
}

# Find markers with presto
find_markers <- function(input_sobj, group_column = NULL, uniq = params$uniq_markers, 
                         FC_min = params$FC_min, auc_min = params$auc_min, p_max = params$p_max_markers, 
                         pct_in_min = params$pct_in_min, pct_out_max = params$pct_out_max, ...) {
  
  res <- input_sobj %>%
    wilcoxauc(group_by = group_column) %>%
    as_tibble() %>%
    filter(
      padj    < p_max,
      logFC   > FC_min,
      auc     > auc_min,
      pct_in  > pct_in_min,
      pct_out < pct_out_max
    ) %>%
    arrange(desc(logFC))
  
  if (uniq) {
    res <- res %>%
      group_by(feature) %>%
      filter(n() == 1) %>%
      ungroup()
  }
  
  res
}

# Find cluster markers for each separate cell type
find_group_markers <- function(input_grp, input_sobj, grp_column, clust_column) {
  
  res <- input_sobj %>%
    subset(!!sym(grp_column) == input_grp)

  clusts <- res@meta.data %>%
    pull(clust_column)

  if (n_distinct(clusts) < 2) {
    return(NULL)
  }

  res <- res %>%
    find_markers(group_column = clust_column) %>%
    mutate(cell_type = input_grp)

  res
}

# Create reference UMAP for comparisons
create_ref_umap <- function(input_sobj, pt_mtplyr = 1, color_guide, ...) {
  
  res <- input_sobj %>%
    plot_features(
      pt_size     = 0.1 * pt_mtplyr,
      pt_outline  = 0.1 * pt_mtplyr + 0.3,
      ...
    ) +
    guides(color = color_guide) +
    blank_theme +
    theme(
      legend.position = "top",
      legend.title    = element_blank(),
      legend.text     = element_text(size = 10)
    )
  
  res
}

# Create UMAPs showing marker gene signal
create_marker_umaps <- function(input_sobj, input_markers, umap_col, add_outline = NULL, pt_mtplyr = 1) {
  
  # pt_size <- 0.25 * pt_mtplyr
  pt_size <- 0.1 * pt_mtplyr
  
  res <- input_markers %>%
    map(~ {
      input_sobj %>%
        plot_features(
          feature    = .x, 
          plot_cols  = c("#fafafa", umap_col),
          pt_outline = add_outline,
          pt_size    = pt_size
        ) +
        ggtitle(.x) +
        blank_theme +
        theme(
          plot.title        = element_text(size = 13),
          legend.position   = "bottom",
          legend.title      = element_blank(),
          legend.text       = element_text(size = 8),
          legend.key.height = unit(0.1, "cm"),
          legend.key.width  = unit(0.3, "cm"),
          axis.title.y      = element_text(size = 13, color = "white"),
          axis.text.y       = element_text(size = 8, color = "white")
        )
    })
  
  res
}

# Create boxplots showing marker gene signal
create_marker_boxes <- function(input_sobj, input_markers, clust_column, box_cols,
                                group = NULL, include_legend = F, all_boxes = F,
                                all_violins = F, order_boxes = T, n_boxes = 10,
                                n_rows = 2, pt_mtplyr = 1, exclude_clust = NULL, ...) {
  
  # Retrieve and format data for boxplots
  input_markers <- input_markers %>%
    head(n_boxes)
  
  box_data <- input_sobj %>%
    FetchData(c(clust_column, input_markers)) %>%
    as_tibble(rownames = "cell_id") %>%
    filter(!(!!sym(clust_column) %in% exclude_clust)) %>%
    mutate(grp = str_remove(!!sym(clust_column), "^[a-zA-Z0-9_]+-"))  # This is specific for Shannon's subtype clusters
  
  input_markers <- input_markers %>%
    str_trunc(9)
  
  # Filter based on input group
  if (!is.null(group)) {
    box_data <- box_data %>%
      filter(grp == group)
  }
  
  # Format data for plots
  box_data <- box_data %>%
    pivot_longer(cols = c(-cell_id, -grp, -!!sym(clust_column)), names_to = "key", values_to = "Counts") %>%
    mutate(
      !!sym(clust_column) := fct_relevel(!!sym(clust_column), names(box_cols)),
      key = str_trunc(key, width = 9, side = "right"),
      key = fct_relevel(key, input_markers)
    )
  
  # Order boxes by mean signal
  if (order_boxes) {
    box_data <- box_data %>%
      mutate(!!sym(clust_column) := fct_reorder(!!sym(clust_column), Counts, mean, .desc = T))
  }
  
  n_clust <- box_data %>%
    pull(clust_column) %>%
    n_distinct()
  
  # Create plots
  n_cols <- ceiling(n_boxes / n_rows)
  
  res <- box_data %>%
    ggplot(aes(!!sym(clust_column), Counts, color = !!sym(clust_column))) + 
    facet_wrap(~ key, ncol = n_cols) +
    scale_color_manual(values = box_cols) +
    theme_info +
    theme(
      panel.spacing.x  = unit(0.7, "cm"),
      strip.background = element_blank(),
      strip.text       = element_text(size = 13),
      legend.position  = "none",
      axis.title.x     = element_blank(),
      axis.title.y     = element_text(size = 13),
      axis.text.x      = element_blank(),
      axis.text.y      = element_text(size = 8),
      axis.ticks.x     = element_blank(),
      axis.line.x      = element_blank()
    )
  
  # Adjust output plot type
  if (n_clust > 6 || all_boxes) {
    res <- res +
      geom_boxplot(aes(fill = !!sym(clust_column), color = !!sym(clust_column)), size = 0, outlier.color = "#f0f0f0", outlier.size = 0.25) +
      stat_summary(fun = "median", geom = "point", shape = 22, size = 1, fill = "white") +
      scale_fill_manual(values = box_cols) +
      theme(...)
    
  } else if (all_violins) {
    res <- res +
      geom_violin(aes(fill = !!sym(clust_column)), size = 0.2) +
      stat_summary(fun = "median", geom = "point", shape = 22, size = 1, fill = "white") +
      scale_fill_manual(values = box_cols) +
      scale_color_manual(values = box_cols) +
      theme(...)
    
  } else {
    pt_size <- 0.3 * pt_mtplyr
    
    res <- res +
      geom_quasirandom(size = pt_size) +
      theme(...)
  }
  
  # Add legend
  if (include_legend) {
    res <- res +
      guides(color = col_guide) +
      theme(legend.position = "top")
  }
  
  # Add blank space for missing facets
  n_keys <- n_distinct(box_data$key)
  
  if (n_keys <= n_cols && n_rows > 1) {
    n_keys <- if_else(n_keys == 1, 2, as.double(n_keys))
    n_cols <- floor(n_cols / n_keys)
    
    res <- res %>%
      plot_grid(
        ncol = n_cols,
        nrow = 2
      )
  }
  
  res
}

# Create figure summarizing marker genes
create_marker_fig <- function(input_sobj, input_markers, input_GO, clust_column, 
                              input_umap, umap_color, fig_heights = c(0.46, 0.3, 0.3), 
                              GO_genome = params$genome, box_colors, n_boxes = 10,
                              umap_outline = NULL, umap_mtplyr = 1, xlsx_name = NULL, 
                              sheet_name = NULL, ...) {
  
  blank_umap <- ggplot() +
    geom_blank() +
    theme_void()
  
  marks_umap  <- blank_umap
  marks_boxes <- blank_umap
  GO_bubbles  <- blank_umap
  
  # Create UMAPs showing marker gene signal
  if (nrow(input_markers) > 0) {
    top_marks <- input_markers$feature %>%
      head(n_boxes)
    
    clust_legend <- get_legend(input_umap)
    
    input_umap <- input_umap +
      theme(legend.position = "none")
    
    marks_umap <- input_sobj %>%
      create_marker_umaps(
        input_markers = head(top_marks, 7),
        umap_col      = umap_color,
        add_outline   = umap_outline,
        pt_mtplyr     = umap_mtplyr
      ) %>%
      append(list(input_umap), .)
    
    marks_umap <- plot_grid(
      plotlist = marks_umap,
      ncol     = 4,
      nrow     = 2,
      align    = "vh",
      axis     = "trbl"
    )
    
    marks_umap <- plot_grid(
      clust_legend, marks_umap,
      rel_heights = c(0.2, 0.9),
      nrow = 2
    )
    
    # Create boxplots showing marker gene signal
    marks_boxes <- input_sobj %>%
      create_marker_boxes(
        input_markers = top_marks,
        clust_column  = clust_column,
        box_cols      = box_colors,
        n_boxes       = n_boxes,
        plot.margin   = unit(c(0.8, 0.2, 0.2, 0.2), "cm"),
        ...
      )
    
    # Create GO term plots
    if (nrow(input_GO) > 0) {
      GO_bubbles <- input_GO %>%
        create_bubbles(plot_colors = umap_color) +
        theme(
          plot.margin      = unit(c(0.8, 0.2, 0.2, 0.2), "cm"),
          strip.background = element_blank(),
          strip.text       = element_text(size = 13),
          axis.title.y     = element_text(size = 13),
          axis.text.y      = element_text(size = 8),
          axis.line.x      = element_blank(),
          legend.position  = "bottom",
          legend.title     = element_blank(),
          legend.text      = element_text(size = 8)
        )
      
      # Write GO terms to excel file 
      if (!is.null(xlsx_name)) {
        input_GO %>%
          dplyr::select(
            term_name,  term_id,
            source,     effective_domain_size,
            query_size, intersection_size,
            p_value,    significant 
          ) %>%
          arrange(source, p_value) %>%
          write.xlsx(
            file      = str_c(xlsx_name, "_GO.xlsx"),
            sheetName = sheet_name,
            append    = T
          )
      }
    }
    
    # Write markers to excel file
    if (!is.null(xlsx_name)) {
      input_markers %>%
        write.xlsx(
          file      = str_c(xlsx_name, "_markers.xlsx"),
          sheetName = sheet_name,
          append    = T
        )
    }
  }
  
  # Create final figure
  res <- plot_grid(
    marks_umap, marks_boxes, GO_bubbles,
    rel_heights = fig_heights,
    ncol        = 1
  )
  
  res
}

# Filter clusters and set cluster order
set_cluster_order <- function(input_cols, input_marks, n_cutoff = 5) {
  input_marks <- input_marks %>%
    group_by(group) %>%
    filter(n() >= n_cutoff) %>%
    ungroup()
  
  marks <- unique(input_marks$group)
  res   <- names(input_cols)
  res   <- res[res %in% marks]
  
  res
}

# Create v1 panel for marker genes
create_marker_panel_v1 <- function(input_sobj, input_cols, input_umap = NULL, clust_column, order_boxes = T,
                                   color_guide = guide_legend(override.aes = list(size = 3.5, shape = 16)),
                                   uniq = params$uniq_GO, umap_mtplyr = 6, xlsx_name = NULL, exclude_clust = NULL, 
                                   ...) {
  
  # Set point size
  # ref_mtplyr <- if_else(umap_mtplyr == 1, umap_mtplyr, umap_mtplyr * 2.5)
  umap_mtplyr <- if_else(ncol(input_sobj) < 500, umap_mtplyr, 1)
  ref_mtplyr <- umap_mtplyr
  
  # Find marker genes
  markers <- input_sobj %>%
    find_markers(group_column = clust_column)
  
  # Find GO terms
  GO_df <- markers
  
  if (nrow(markers) > 0) {
    GO_df <- markers %>%
      group_by(group) %>%
      do({
        arrange(., desc(logFC)) %>%
          pull(feature) %>%
          run_gprofiler(genome = params$genome)
      }) %>%
      ungroup()
  }
  
  if (uniq && nrow(GO_df) > 0) {
    GO_df <- GO_df %>%
      group_by(term_id) %>%
      filter(n() == 1) %>%
      ungroup()
  }
  
  # Set cluster order based on order of input_cols
  fig_clusters <- input_cols %>%
    set_cluster_order(markers)
  
  fig_clusters <- fig_clusters[!fig_clusters %in% exclude_clust]
  
  # Create figures
  for (i in seq_along(fig_clusters)) {
    cat("\n#### ", fig_clusters[i], "\n", sep = "")
    
    # Filter markers and GO terms
    clust <- fig_clusters[i]
    
    fig_marks <- markers %>%
      filter(group == clust)
    
    fig_GO <- GO_df %>%
      filter(group == clust)
    
    # Create reference umap
    ref_umap <- input_umap
    umap_col <- input_cols[clust]
    
    if (is.null(input_umap)) {
      umap_levels <- input_cols[names(input_cols) != clust]
      umap_levels <- names(c(umap_levels, umap_col))
      
      ref_umap <- input_sobj %>%
        create_ref_umap(
          feature     = clust_column,
          plot_cols   = input_cols,
          feat_levels = umap_levels,
          pt_mtplyr   = ref_mtplyr,
          color_guide = color_guide
        )
    }
    
    # Create panel
    marker_fig <- input_sobj %>%
      create_marker_fig(
        input_markers = fig_marks,
        input_GO      = fig_GO,
        clust_column  = clust_column,
        input_umap    = ref_umap,
        umap_color    = umap_col,
        box_colors    = input_cols,
        order_boxes   = order_boxes,
        umap_mtplyr   = umap_mtplyr,
        xlsx_name     = xlsx_name,
        sheet_name    = clust,
        exclude_clust = exclude_clust,
        ...
      )
    
    cat(nrow(fig_marks), "marker genes were identified,", nrow(fig_GO), "GO terms were identified.")
    print(marker_fig)
    cat("\n\n---\n\n<br>\n\n<br>\n\n")
  }
}

# Create v2 panel that splits plots into groups
create_marker_panel_v2 <- function(input_sobj, input_markers, input_cols, grp_column, clust_column,
                                   color_guide = guide_legend(override.aes = list(size = 3.5, shape = 16)), 
                                   uniq_GO = params$uniq_GO, umap_mtplyr = 6, xlsx_name = NULL, clust_regex = "^[a-zA-Z0-9_]+-",
                                   ...) {
  
  # Set point size
  # ref_mtplyr <- if_else(ncol(input_sobj) < 500, umap_mtplyr * 2.5, 1)
  umap_mtplyr <- if_else(ncol(input_sobj) < 500, umap_mtplyr, 1)
  ref_mtplyr <- umap_mtplyr
  
  # Figure colors and order
  fig_clusters <- input_cols %>%
    set_cluster_order(input_markers)
  
  # Find GO terms
  GO_df <- input_markers %>%
    group_by(group) %>%
    do({
      arrange(., desc(logFC)) %>%
        pull(feature) %>%
        run_gprofiler(genome = params$genome)
    }) %>%
    ungroup()
  
  if (uniq_GO && nrow(GO_df) > 0) {
    GO_df <- GO_df %>%
      group_by(term_id) %>%
      filter(n() == 1) %>%
      ungroup()
  }
  
  # Create figures
  for (i in seq_along(fig_clusters)) {
    cat("\n#### ", fig_clusters[i], "\n", sep = "")
    
    # Filter markers and GO terms
    clust <- fig_clusters[i]
    
    fig_marks <- input_markers %>%
      filter(group == clust)
    
    fig_GO <- GO_df %>%
      filter(group == clust)
    
    # Set colors
    umap_col <- input_cols[clust]
    
    group <- clust %>%
      str_remove(clust_regex)
    
    grp_regex <- str_c("-", group, "$") %>%
      str_replace("\\+", "\\\\+")            # include this to escape "+" in names
    
    fig_cols <- input_cols[grepl(grp_regex, names(input_cols))]
    fig_cols <- c( "Other" = "#fafafa", fig_cols)
    ref_cols <- fig_cols[names(fig_cols) != clust]
    ref_cols <- c(ref_cols, umap_col)
    
    # Create reference UMAP
    ref_umap <- input_sobj %>%
      FetchData(c("UMAP_1", "UMAP_2", grp_column, clust_column)) %>%
      as_tibble(rownames = "cell_id") %>%
      mutate(!!sym(clust_column) := if_else(
        !!sym(grp_column) != group, 
        "Other", 
        !!sym(clust_column)
      )) %>%
      create_ref_umap(
        feature     = clust_column,
        plot_cols   = ref_cols,
        feat_levels = names(ref_cols),
        pt_mtplyr   = ref_mtplyr,
        color_guide = color_guide
      )
    
    # Create panel
    marker_fig <- input_sobj %>%
      create_marker_fig(
        input_markers = fig_marks,
        input_GO      = fig_GO,
        clust_column  = clust_column,
        input_umap    = ref_umap,
        umap_color    = umap_col,
        box_colors    = fig_cols,
        group         = group,
        umap_mtplyr   = umap_mtplyr,
        xlsx_name     = xlsx_name,
        sheet_name    = clust,
        ...
      )
    
    cat(nrow(fig_marks), "marker genes were identified.", nrow(fig_GO), "GO terms were identified.")
    print(marker_fig)
    cat("\n\n---\n\n<br>\n\n<br>\n\n")
  }
}


# Default chunk options
knitr::opts_chunk$set(message = F, warning = F)

# Load packages
R_packages <- c(
  "tidyverse",  "Seurat",
  "gprofiler2", "knitr",
  "cowplot",    "ggbeeswarm",
  "ggrepel",    "RColorBrewer",
  "xlsx",       "colorblindr",
  "ggforce",    "broom",
  "mixtools",   "clustifyr",
  "boot",       "scales",
  "patchwork",  "ComplexHeatmap",
  "devtools",   "broom",
  "presto",     "here"
)

purrr::walk(R_packages, library, character.only = T)


# ggplot2 themes
theme_info <- theme_cowplot() +
  theme(
    plot.title       = element_text(face = "plain", size = 20),
    strip.background = element_blank(),
    strip.text       = element_text(face = "plain")
  )

umap_theme <- theme_info +
  theme(
    axis.text  = element_blank(),
    axis.ticks = element_blank()
  )

blank_theme <- umap_theme +
  theme(
    axis.line  = element_blank(),
    axis.title = element_blank()
  )

# Legend guides
col_guide <- guide_legend(override.aes = list(size = 3.5, shape = 16))

outline_guide <- guide_legend(override.aes = list(
  size   = 3.5,
  shape  = 21,
  color  = "black",
  stroke = 0.25
))

# Base color palettes
base_cols <- c(
  "#225ea8",  # blue
  "#e31a1c",  # red
  "#238443",  # green
  "#ec7014",  # orange
  "#6a51a3",  # purple
  "#c51b7d",  # pink
  "#8c510a",  # brown
  "#217D87",  # teal, darken("#41b6c4", 0.3)
  "#F0E442",  # yellow, palette_OkabeIto[4]
  "#000000"   # black
)

base_cols_paired <- base_cols %>%
  map(~ {
    .x %>%
        lighten(0.25) %>%
        desaturate(0.2) %>%
        c(.x)
    })

names(base_cols_paired) <- base_cols

base_cols <- base_cols %>%
  lighten(0.25) %>%
  desaturate(0.2) %>%
  c(base_cols, .)

# Okabe Ito color palettes
ito_cols <- c(
  palette_OkabeIto[1:4], "#d7301f", 
  palette_OkabeIto[5:6], "#6a51a3", 
  palette_OkabeIto[7:8]
)

ito_cols <- ito_cols %>%
  darken(0.4) %>% 
  c(ito_cols, ., "#000000")

# Set color palette
theme_cols <- ito_cols


# Set paths/names
so_paths <- params$sobjs %>%
  pull_nest_vec("sobj_in") %>%
  map_chr(~ here(params$rds_dir, .x))

so_out <- params$sobjs %>%
  pull_nest_vec("sobj_out") %>%
  map_chr(~ here(params$rds_dir, .x))

so_types <- params$sobjs %>%
  pull_nest_vec("cell_type")

so_titles <- params$sobjs %>%
  pull_nest_vec("title")

# Load and subset objects
# Avoid loading same objects twice
if (!all(file.exists(so_out))) {
  
  sobjs <- unique(so_paths) %>%                           # Load objects
    setNames(., .) %>%
    map(read_rds)
  
  sobjs <- sobjs[match(so_paths, names(sobjs))]
  names(sobjs) <- names(so_paths)
  
  inc_types <- so_types %>%
    map(~ if_else(
      .x == "LEC",
      list(c("B cell", "T cell", "epithelial")),
      list(c("B cell", "T cell"))
    )) %>%
    flatten()
  
  sobjs <- imap(sobjs, subset_sobj, so_types, inc_types)  # Subset objects
  sobjs <- imap(sobjs, run_clustifyr, so_types)           # Run clustifyr
  
  # Combine CCR7hi XCR- Mig cDC2s and CCR7hi Mig cDC2s
  DC_idx <- so_types[names(sobjs)] == "DC"
  sobjs[DC_idx] <- sobjs[DC_idx] %>%
    map(~ {
      .x@meta.data <- .x@meta.data %>%
        rownames_to_column("cell_id") %>%
        mutate(subtype = if_else(subtype == "CCR7hi XCR1- Mig cDC2", "CCR7hi Mig cDC2", subtype)) %>%
        column_to_rownames("cell_id")
      
      .x
    })
  
  # Write new objects
  sobjs %>%
    iwalk(~ write_rds(.x, path = so_out[.y]))

} else {
  sobjs <- so_out %>%
    map(read_rds)
}

# Color palettes
common_cols <- c(
  "Epithelial" = "#6a51a3",
  "B Cell"     = "#E69F00",
  "T Cell"     = "#009E73",
  "Unassigned" = "#ffffff"
)

so_cols <- so_types %>%
  map(
    set_type_cols,
    sobjs_in   = sobjs,
    type_key   = so_types,
    cols_in    = ito_cols,
    other_cols = common_cols
  )
# Add one to variable
plus_one <- function(x, n = 1) {
  cmd <- str_c(x, " <<- ", x, " + ", n)
  
  eval(parse(text = cmd))
  eval(parse(text = x))
}

# Add arrows to axis
add_arrow_axis <- function(gg_in, fract = 0.1, ...) {
  
  get_line_coords <- function(range_in, fract) {
    mn  <- range_in[1]
    mx  <- range_in[2]
    dif <- mx - mn

    res <- c(mn - (dif * 0.05), mn + (dif * fract))

    res
  }
  
  x_coords <- ggplot_build(gg_in)$layout$panel_scales_x[[1]]$range$range %>%
    get_line_coords(fract = fract)

  y_coords <- ggplot_build(gg_in)$layout$panel_scales_y[[1]]$range$range %>%
    get_line_coords(fract = fract)
   
  res <- gg_in +
    geom_segment(
      x        = x_coords[1],
      xend     = x_coords[2], 
      y        = y_coords[1], 
      yend     = y_coords[1],
      size     = 0.25,
      color    = "black",
      linejoin = "bevel",
      arrow    = arrow(ends = "last", type = "open", length = unit(0.02, "npc")),
      ...
    ) +
    geom_segment(
      y        = y_coords[1], 
      yend     = y_coords[2], 
      x        = x_coords[1], 
      xend     = x_coords[1], 
      size     = 0.25,
      color    = "black",
      linejoin = "bevel",
      arrow    = arrow(ends = "last", type = "open", length = unit(0.02, "npc")),
      ...
    ) +
    theme(axis.title = element_text(hjust = 0, size = 10))
  
  res
}

# Set equal x-axis scales
equalize_x <- function(gg_list_in, log_tran = T, ...) {
  
  set_lims <- function(gg_in, min_x, max_x, log_tran, ...) {
    res <- gg_in +
      coord_cartesian(xlim = c(min_x, max_x))
    
    if (log_tran) {
      res <- res +
        scale_x_log10(labels = trans_format("log10", math_format(10^.x)), ...)
    }
    
    res
  }
  
  gg_ranges <- gg_list_in %>%
    map(~ ggplot_build(.x)$layout$panel_scales_x[[1]]$range$range)
  
  min_val <- gg_ranges %>%
    map_dbl(~ .x[1]) %>%
    min()
  
  max_val <- gg_ranges %>%
    map_dbl(~ .x[2]) %>%
    max()
  
  res <- gg_list_in %>%
    map(
      set_lims,
      min_x    = min_val, 
      max_x    = max_val, 
      log_tran = log_tran,
      ...
    )
  
  res
}

# Function to create figure panel
create_fig3 <- function(sobj_in, cols_in, subtype_column = "subtype", data_slot = "counts",
                        box_counts = c("Relative ova signal" = "ova_fc"), umap_counts = c("ova counts" = "adt_ovalbumin"), 
                        plot_title = NULL, arrow_axis = F, pt_size = 0.1, pt_outline = 0.4, pt_size_2 = 0.3, pt_outline_2 = 0.5,
                        umap_cell_count = F, box_cell_count = T, control_types = c("B Cell", "T Cell"), ...) {
  
  box_column <- umap_column <- "cell_type"
  box_cols <- umap_cols <- cols_in
  
  # Fetch plotting data
  data_df <- sobj_in %>%
    FetchData(c(subtype_column, box_counts, umap_counts, "UMAP_1", "UMAP_2"), slot = data_slot) %>%
    as_tibble(rownames = "cell_id")
  
  # Add pseudo count
  if (0 %in% pull(data_df, box_counts)) {
    data_df <- data_df %>%
      mutate(
        pseudo = ifelse(!!sym(box_counts) > 0, !!sym(box_counts), NA),
        pseudo = min(pseudo, na.rm = T) * 0.5,
        !!sym(box_counts) := !!sym(box_counts) + pseudo
      )
  }
  
  if (!is.null(names(box_counts))) {
    data_df <- data_df %>%
      rename(!!box_counts)
    
    box_counts <- names(box_counts)
  }
  
  if (!is.null(names(umap_counts))) {
    data_df <- data_df %>%
      rename(!!umap_counts)
    
    umap_counts <- names(umap_counts)
  }
  
  # Set subtype order
  # Move select cell types to front of order
  data_df <- data_df %>%
    mutate(
      cell_type = !!sym(subtype_column),
      cell_type = fct_reorder(cell_type, !!sym(box_counts), median)
    )
  
  type_order <- levels(data_df$cell_type)
  
  if (!is.null(control_types)) {
    control_types <- control_types[control_types %in% type_order]
    type_order <- type_order[!type_order %in% control_types]
    type_order <- c(control_types, type_order)
  }
  
  # Count cells for each subtype
  data_df <- data_df %>%
    group_by(cell_type) %>%
    mutate(cell_count = n_distinct(cell_id)) %>%
    ungroup() %>%
    mutate(cell_type = fct_relevel(cell_type, type_order)) %>%
    arrange(cell_type) %>%
    mutate(
      cell_count = str_c(cell_type, "\n(n = ", cell_count, ")"),
      cell_count = fct_inorder(cell_count)
    )
  
  # Set cell type colors
  names(type_order) <- levels(data_df$cell_count)
  
  cols_df <- tibble(
    cell_type = type_order,
    cell_count = names(type_order)
  )
  
  cols_df <- cols_df %>%
    mutate(color = cols_in[cell_type])
  
  # Subtype UMAP
  if (umap_cell_count) {
    umap_column <- "cell_count"
    umap_cols <- setNames(cols_df$color, cols_df$cell_count)
  }
  
  umap <- data_df %>%
    plot_features(
      feature     = umap_column,
      pt_size     = pt_size,
      pt_outline  = pt_outline,
      plot_cols   = umap_cols,
      feat_levels = rev(names(umap_cols))
    ) +
    guides(color = guide_legend(override.aes = list(size = 3.5))) +
    ggtitle(plot_title) +
    blank_theme +
    theme(
      plot.title = element_text(size = 12),
      legend.position = "none"
    ) +
    theme(...)
  
  if (arrow_axis) {
    umap <- add_arrow_axis(umap)
  }
  
  # OVA UMAP
  if (!is.null(umap_counts)) {
    ova_umap <- data_df %>%
      plot_features(
        feature    = umap_counts,
        plot_cols  = c("#fafafa", "#d7301f"),
        pt_size    = pt_size_2,
        pt_outline = pt_outline_2,
        min_pct    = 0.01,
        max_pct    = 0.99
      ) +
      blank_theme +
      theme(
        plot.title        = element_text(size = 10, hjust = 0.5),
        legend.position   = "right",
        legend.key.width  = unit(0.15, "cm"),
        legend.key.height = unit(0.30, "cm"),
        legend.title      = element_text(size = 10),
        legend.text       = element_text(size = 10)
      )
    
    if (arrow_axis) {
      ova_umap <- add_arrow_axis(ova_umap)
    }
  }
  
  # OVA boxes
  if (box_cell_count) {
    box_column <- "cell_count"
    box_cols <- setNames(cols_df$color, cols_df$cell_count)
  }
  
  boxes <- data_df %>%
    ggplot(aes(!!sym(box_counts), !!sym(box_column), fill = !!sym(box_column))) +
    geom_violin(size = 0.3, draw_quantiles = c(0.25, 0.75), alpha = 0.75) +
    stat_summary(geom = "point", color = "black", fun = median) +
    scale_color_manual(values = box_cols) +
    scale_fill_manual(values = box_cols) +
    theme_minimal_vgrid() +
    theme(
      legend.position    = "none",
      axis.title.y       = element_blank(),
      axis.title         = element_text(size = 10),
      axis.text          = element_text(size = 10),
      axis.ticks.x       = element_line(size = 0.1),
      panel.grid.major.x = element_line(size = 0.1)
    )
  
  res <- list(umap, boxes)
  
  if (!is.null(umap_counts)) {
    res <- append(res, list(ova_umap))
  }
  
  names(res) <- rep(plot_title, length(res))
  
  res
}

# Plot correlation
plot_corr <- function(sobj_in, x, y, feat, data_slot, cols_in, plot_title, ...) {
  res <- sobj_in %>%
    FetchData(c(x, y, feat), slot = data_slot) %>%
    mutate(
      !!sym(x) := log10(!!sym(x)),
      !!sym(y) := log10(!!sym(y))
    ) %>%
    filter(
      !!sym(x) != -Inf,
      !!sym(y) != -Inf
    ) %>%
    plot_features(
      x         = x,
      y         = y,
      feature   = feat,
      data_slot = "counts",
      plot_cols = cols_in,
      lab_pos   = c(0.9, 1),
      lab_size  = 5,
      calc_cor  = T,
      lm_line   = T,
      ...
    ) +
    ggtitle(plot_title) +
    guides(color = guide_legend(override.aes = list(size = 3.5))) +
    theme_info +
    theme(legend.title = element_blank())
  
  res
}

# Calculate pairwise p-values for gg objects
calc_p_vals <- function(gg_in, sample_name, data_column, type_column, log_tran = T) {
  
  # Pull data from gg object
  gg_data <- gg_in$data
  
  # Log transform
  if (log_tran) {
    gg_data <- gg_data %>%
      mutate(!!sym(data_column) := log10(!!sym(data_column)))
  }
  
  # Calculate median
  gg_stats <- gg_data %>%
    group_by(!!sym(type_column)) %>%
    summarize(med = median(!!sym(data_column)))
  
  # Run wilcox test
  gg_counts <- gg_data %>%
    pull(data_column)
  
  gg_groups <- gg_data %>%
    pull(type_column)
  
  res <- gg_counts %>%
    pairwise.wilcox.test(
      g = gg_groups, 
      p.adj = "bonf"
    ) %>%
    tidy()
  
  # Add medians to data.frame
  res <- gg_stats %>%
    rename(med_1 = med) %>%
    right_join(res, by = c("cell_type" = "group1"))
  
  res <- gg_stats %>%
    rename(med_2 = med) %>%
    right_join(res, by = c("cell_type" = "group2"))
  
  # Format final table
  res <- res %>%
    mutate(Sample = sample_name) %>%
    select(
      Sample,
      `Cell type 1`             = str_c(type_column, ".y"),
      `Median OVA FC 1 (log10)` = med_1,
      `Cell type 2`             = type_column,
      `Median OVA FC 2 (log10)` = med_2,
      p.value
    )
  
  res
}


# Parameter lists
fig3_names <- c("d2_DC", "d14_DC", "d14_LEC", "d14_FRC")
fig3_sobjs <- sobjs[fig3_names]

fig3_params <- list(
  sobj_in    = fig3_sobjs, 
  plot_title = so_titles[names(fig3_sobjs)],
  cols_in    = so_cols[names(fig3_sobjs)]
)

corr_params <- list(
  sobj_in    = sobjs, 
  plot_title = so_titles[names(sobjs)], 
  cols_in    = so_cols[names(sobjs)]
)

n_plot <- 0

Supplemental 2

Clustering for LEC subtype assignment using Xiang et al. reference

# Load reference
load(here(params$sobj_refs$LEC))

so_in <- sobjs[["d14_LEC"]] %>%
  FindNeighbors(
    reduction = "pca",
    dims = 1:30
  )

# Get subtypes for different resolutions/thresholds
resln <- c(0.6, 1, 1.4, 1.8, 3)
r <- seq(0.5, 0.8, 0.1)

res_df <- expand.grid(resln, r) %>%
  rename(resln = Var1, r_thresh = Var2) %>%
  as_tibble()

plot_df <- map2(res_df$resln, res_df$r_thresh, ~ {
  res <- so_in %>%
    FindClusters(
      resolution = .x,
      verbose    = F
    ) %>%
    clustify(
      ref_mat     = ref_LEC_xiang,
      cluster_col = "seurat_clusters",
      threshold   = .y
    )
  
  res <- res@meta.data %>%
    as_tibble(rownames = "cell_id") %>%
    mutate(
      subtype  = if_else(cell_type == "LEC", type.clustify, subtype),
      subtype  = str_to_title_v2(subtype),
      UMAP_1   = UMAP_1.x,
      UMAP_2   = UMAP_2.x,
      n_clust  = n_distinct(seurat_clusters),
      r        = r.clustify,
      r_thresh = .y
    ) %>%
    select(
      cell_id, orig.ident, seurat_clusters, 
      cell_type, subtype, n_clust, 
      r, r_thresh, UMAP_1, UMAP_2
    )
  
  res
}) %>%
  bind_rows()

# Subtype UMAPs
subtype_gg <- plot_df %>%
  plot_features(
    feature    = "subtype",
    pt_outline = 0.5,
    pt_size    = 0.05,
    plot_cols  = so_cols[["d14_LEC"]], 
    split_id   = c("n_clust", "r_thresh"), 
    switch     = "y"
  ) +
  blank_theme +
  labs(subtitle = "Clustifyr Threshold", y = "Number of Clusters") +
  guides(color = outline_guide) +
  blank_theme +
  theme(
    plot.subtitle = element_text(hjust = 0.5),
    panel.border  = element_rect(fill = NA, color = "black", size = 0.2),
    legend.title  = element_blank(),
    axis.title.y  = element_text()
  )

# Reference UMAPs
clust_guide <- outline_guide
clust_guide$ncol <- 1

ref_gg <- plot_df %>%
  mutate(r_thresh = "Clusters") %>%
  unique() %>%
  plot_features(
    feature    = "seurat_clusters",
    pt_outline = 0.5,
    pt_size    = 0.05,
    split_id   = c("n_clust", "r_thresh"), 
    switch     = "y"
  ) +
  blank_theme +
  labs(y = "Number of Clusters") +
  guides(fill = clust_guide) +
  blank_theme +
  theme(
    plot.subtitle = element_text(hjust = 0.5),
    panel.border  = element_rect(fill = NA, color = "black", size = 0.2),
    legend.title  = element_blank(),
    axis.title.y  = element_text()
  )

# Correlation UMAPs
cor_df <- plot_df %>%
  mutate(r_thresh = "Correlation") %>%
  unique()

cor_gg <- cor_df %>%
  ggplot(aes(UMAP_1, UMAP_2, color = r))

walk(unique(cor_df$seurat_clusters), ~ {
  pt_1 <- cor_df %>%
    filter(
      seurat_clusters == .x,
      r <= 0.6
    )
  
  pt_2 <- cor_df %>%
    filter(
      seurat_clusters == .x,
      r > 0.6
    )
  
  cor_gg <<- cor_gg +
    geom_point(data = pt_1, color = "black", size = 0.5) +
    geom_point(data = pt_1, color = "white", size = 0.05) +
    geom_point(data = pt_2, color = "black", size = 0.5) +
    geom_point(data = pt_2, size = 0.05)
})

cor_gg <- cor_gg +
  facet_grid(n_clust ~ r_thresh, switch = "y") +
  labs(y = "Number of Clusters") +
  blank_theme +
  theme(
    panel.border = element_rect(fill = NA, color = "black", size = 0.2),
    legend.title = element_blank(),
    axis.title.y = element_text()
  )

# Create final figure
plot_grid(
  ref_gg, cor_gg, subtype_gg,
  rel_widths = c(0.4, 0.4, 1),
  nrow  = 1,
  align = "vh",
  axis  = "tbrl"
)